Notes: Data: 2020-11-03_Switzerland_upto_2018-11-31, Switzerland_2018-12-01_2020-11-03. Pre-processing: After gathering the data, the two datasets were merged and it was cleaned the duplicates and empty values, specially for dates
PACKAGES
#CLEANING PROCESS TO JOIN GATHERED DATA FROM FLICKR
#data <- read.csv("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/2020-11-03_Switzerland_upto_2018-11-31.csv", encoding = "UTF-8" )
#saveRDS(data, "//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Compile_2018-11-03.rds")
#data2 <- read.csv("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Switzerland_2018-12-01_2020-11-03.csv", encoding = "UTF-8")
#saveRDS(data2, "//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Update_2020-11-03.rds")
# export dataframes to backup the main tables
#d1 <- readRDS("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Compile_2018-11-03.rds")
#d2 <- readRDS("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Update_2020-11-03.rds")
#Join two dataset in a single main dataframe
#raw <- dplyr::bind_rows(d1, d2)
#raw$tags <- gsub(";", " ", raw$tags) # Remove symbol in column tags
#raw$post_title <- gsub(";", " ", raw$post_title) # Remove symbol in column post_title
#raw$post_body <- gsub(";", " ", raw$post_body) # Remove symbol in column post_body
#raw_clean <- subset(raw, select= c("post_guid", "longitude", "latitude", "user_guid", "post_create_date", "post_publish_date", "tags", "post_title", "post_body", "place_guid"))
#raw_clean <- distinct(raw_clean, post_guid, .keep_all = TRUE)
#raw_clean <- filter(raw_clean, longitude >= 0.0001)
#Export the total table for future applications
#write.table(raw_clean,"~/GitHub/Flickr_SwissParks/Join2018-2020.csv", sep=";", dec=".")
# define root folders for data
root_folder <- '~/GitHub/Flickr_SwissParks/' # local folder
setwd<- '~/GitHub/Flickr_SwissParks/'
# set coordinate referencing system (for changing CRS but not reprojecting)
crs_wgs84 <- "+init=epsg:4326" # lat/lng
crs_sng <- "+init=epsg:2056" # Swiss National, CH1903+ / LV95
# set coordinate referencing system (for changing CRS but not reprojecting)
crs_wgs84 <- "+init=epsg:4326" # lat/lng
crs_osm <- "+init=epsg:3857" # OSM projection
crs_bng <- "+init=epsg:27700" # British National Grid, BNG
crs_sng <- "+init=epsg:2056" # Swiss National, CH1903+ / LV95
# Set coordinate systems for reprojecting
proj_wgs84 <- '+proj=longlat +datum=WGS84'
proj_osm <- '+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0.0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs'
proj_bng <- '+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs'
proj_sng <- '+proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs'
# set point at which plot will switch from standard form
options(scipen=7)
# chart theme
t <- theme_bw() +
theme(panel.border=element_blank(), # removes border around chart area
axis.text = element_text(size = 10, colour='#444444'),
axis.ticks = element_line(colour='gray'))
# reproject from lat/lng (WGS84) to another CRS
# ---------------------------------------------
# requires columns labeled 'lat', 'lng' in first two columns, and at least two other columns (4 cols minimum)
reproject <- function(df, proj_crs) {
dataset_map_coords <- df[c("lng", "lat")] # specifying names to avoid mixing x and y up in order...
dataset_map_data <- df[ ,c(3:ncol(df))]
dataset_map <- SpatialPointsDataFrame(coords=dataset_map_coords, data=dataset_map_data)
# set CRS and reproject to OSM for OSM number system
dataset_map@proj4string # check first, should be NA - not yet been set
proj4string(dataset_map) <- CRS(crs_wgs84) # set the current coordinates system
dataset_map <- spTransform(dataset_map, CRS(proj_crs)) # reproject to new CRS
# convert back to dataframe with reprojected cooordinates and then amend to original dataset
newdf <- as.data.frame(dataset_map)
names(newdf)[names(newdf) == "lng"] <- 'crs_x';
names(newdf)[names(newdf) == "lat"] <- 'crs_y'
newdf <- newdf[, c("crs_x", "crs_y")] # just want to keep the reprojected coordinates to append back to dataset
df <- cbind(df, newdf)
return(df)
}
# load in the data files and standardise lat/lng column name for reproject
# ---------------------------------------
# load data file, rename coordinates columns
loaddata <- function(input_file) {
folder <- paste0(root_folder, '')
input_data <- paste0(folder, input_file)
df <- read.csv(input_data, sep = ";", na.strings="0")
# rename latitude and longitude columns to lat and lng
names(df)[names(df) == "latitude"] <- "lat"
names(df)[names(df) == "longitude"] <- "lng"
return(df)
}
#Run the functions with the raw data
input_file <- 'Join2018-2020.csv'
orig <- loaddata(input_file)
orig$lat <- as.numeric(as.character(orig$lat))
orig$lng <- as.numeric(as.character(orig$lng))
orig$is_na = ifelse(is.na(orig$lat), TRUE, FALSE)
orig<-orig[!(orig$is_na=="TRUE"),]
# Drop unneeded columns and rows to slim the dataset
# -----------------------------------------
#### a. Only pictures taken after 2004 (inclusive)
# Creating a subset of the data, just for picture that were taken after 2004
df <- orig
df$post_create <- as.Date(df$post_create_date)
reduced_04 <- filter(df, post_create >= "2004-01-01") # we create a new dataframe just for the data after 2004
reduced_04 <- filter(reduced_04, post_create <= "2020-11-03") # we create a new dataframe just for the data after 2004
trimmed <- reduced_04
rm(df)
# add in reprojection to CH1903+ / LV95
# -------------------------------------
# slice off min. fields to acquire reprojection coords, then rejoin to dataset
# order matters: need lat and lng in first two fields + two other fields for data (any will do)
df <- trimmed[, c(3, 2, 1, 4:10)]
proj_crs <- proj_sng # reproject from WGS84 to Swiss nat.coords
df <- reproject(df, proj_crs)
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on Bessel 1841 ellipsoid in Proj4
## definition
df <- df[, c(11:12)] # just keep the new coords and then merge back into source
trimmed <- cbind(trimmed, df)
trimmed <- trimmed[, c(1, 4:10, 13,14)]
rm(df)
# rename latitude and longitude columns to lat and lng
names(trimmed)[names(trimmed) == "crs_y"] <- "lat"
names(trimmed)[names(trimmed) == "crs_x"] <- "lng"
df <- trimmed
# create SpatialPointsPolygon for data
df_map_coords <- df[c("lng", "lat")]
df_map_data <- df[ ,c(1:8)]
df_map <- SpatialPointsDataFrame(coords=df_map_coords, data=df_map_data)
proj4string(df_map) <- CRS(proj_sng)
df_map_St <- st_as_sf(df_map)
# preparation of shapefiles of land cover, cantonal limits and parks polygons.
parks <- readOGR(dsn="//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/GIS/Swiss_Parks.shp", use_iconv=TRUE, encoding="UTF-8")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS
## = dumpSRS, : Discarded datum CH1903+ in Proj4 definition: +proj=somerc
## +lat_0=46.9524055555556 +lon_0=7.43958333333333 +k_0=1 +x_0=2600000 +y_0=1200000
## +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs
## OGR data source with driver: ESRI Shapefile
## Source: "\\files.geo.uzh.ch\shared\group\geocomp\jort_franziska_daniela\GIS\Swiss_Parks.shp", layer: "Swiss_Parks"
## with 29 features
## It has 13 fields
## Integer64 fields read as strings: OBJECTID Rechtsgrun
## Warning in readOGR(dsn = "//files.geo.uzh.ch/shared/group/geocomp/
## jort_franziska_daniela/GIS/Swiss_Parks.shp", : Z-dimension discarded
parks <- spTransform(parks, CRS(proj_sng))
landcover <- readOGR("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/GIS/LandCover_Parks.shp", use_iconv=TRUE, encoding="UTF-8")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS
## = dumpSRS, : Discarded datum CH1903+ in Proj4 definition: +proj=somerc
## +lat_0=46.9524055555556 +lon_0=7.43958333333333 +k_0=1 +x_0=2600000 +y_0=1200000
## +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs
## OGR data source with driver: ESRI Shapefile
## Source: "\\files.geo.uzh.ch\shared\group\geocomp\jort_franziska_daniela\GIS\LandCover_Parks.shp", layer: "LandCover_Parks"
## with 39308 features
## It has 5 fields
## Integer64 fields read as strings: OBJECTID Id gridcode
landcover <- spTransform(landcover, CRS(proj_sng))
kanton <- readOGR("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/GIS/Swiss_Kanton.shp", use_iconv=TRUE, encoding="UTF-8")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS
## = dumpSRS, : Discarded datum CH1903+ in Proj4 definition: +proj=somerc
## +lat_0=46.9524055555556 +lon_0=7.43958333333333 +k_0=1 +x_0=2600000 +y_0=1200000
## +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +vunits=m
## +no_defs
## OGR data source with driver: ESRI Shapefile
## Source: "\\files.geo.uzh.ch\shared\group\geocomp\jort_franziska_daniela\GIS\Swiss_Kanton.shp", layer: "Swiss_Kanton"
## with 50 features
## It has 20 fields
## Integer64 fields read as strings: ERSTELL_J REVISION_J HERKUNFT_J KANTONSNUM EINWOHNERZ
## Warning in readOGR("//files.geo.uzh.ch/shared/group/geocomp/
## jort_franziska_daniela/GIS/Swiss_Kanton.shp", : Z-dimension discarded
kanton <- spTransform(kanton, CRS(proj_sng))
#Convert a geographical object in a simple feature with geometry
#Reduce the time of processing, making efficient the process of intersection
parks_St <- st_as_sf(parks)
landcover_St <- st_as_sf(landcover)
kanton_St <- st_as_sf(kanton)
# Intersect points (Flickr images) with limits of parks
Flickr_park <- st_intersection(parks_St, df_map_St)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
# Intersect points (Flickr images) with land cover.
Flickr_park <- st_intersection(landcover_St, Flickr_park)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
# Include data of canton in the points data
Flickr_park <- st_intersection(kanton_St, Flickr_park)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Flickr_Parks_map.png")
ggplot() +
geom_sf(data = kanton_St, fill = "grey", color = "white") +
geom_sf(data = Flickr_park, fill = NA, color = "yellow", size=0.005) +
geom_sf(data = parks_St, fill= NA, color = "dark green", size= 1) +
ggtitle("Flickr pictures in Swiss National Parks") +
theme(plot.title = element_text(hjust = 0.5))+
coord_sf()
rm(trimmed)
rm(df)
rm(t)
rm(df_map)
rm(df_map_data)
rm(df_map_coords)
rm(df_map_St)
rm(landcover)
rm(parks)
rm(kanton)
flickr <- Flickr_park[c(39, 40, 41, 19, 23, 28, 38, 43, 44, 45, 47)]
# rename
names(flickr)[names(flickr) == "post_guid"] <- "photo_id"
names(flickr)[names(flickr) == "user_guid"] <- "USER"
names(flickr)[names(flickr) == "post_create_date"] <- "DATE"
names(flickr)[names(flickr) == "gridcode"] <- "USE"
names(flickr)[names(flickr) == "NAME"] <- "CANTON"
names(flickr)[names(flickr) == "SHAPE_Area"] <- "km2"
names(flickr)[names(flickr) == "tags"] <- "TAGS"
names(flickr)[names(flickr) == "Name"] <- "PARK"
names(flickr)[names(flickr) == "post_title"] <- "TITLE"
names(flickr)[names(flickr) == "post_body"] <- "BODY"
flickr <- flickr %>%
mutate(lng = unlist(map(flickr$geometry,1)),
lat = unlist(map(flickr$geometry,2)))
flickr <- st_drop_geometry(flickr)
#General statistics of each column
summary(flickr$photo_id)
## Length Class Mode
## 77694 character character
#The library dplyr provide several notifications/observations, so we are going to avoid to write in the report.
options(dplyr.summarise.inform = FALSE)
Only pictures that are in categories of land cover that are natural
# Using the table obtained before, we create another subset avoiding the pictures in the land cover 100, 120, 140, 160
# The pictures that are in land cover related to natural categories were considered
flickr_nat<- filter(flickr, USE!=100, USE!=120, USE!=140, USE!=160) # The expression USE! means avoid/reduce
#General statistics of each column
summary(flickr_nat$photo_id)
## Length Class Mode
## 56804 character character
# Application of the filter of the use, only natural land cover
db <- flickr_nat #We create a copy for future use
ddbb <- db # We create another copy for future use
a. Number of visitors each Swiss National Parks, according the land cover,
NA values refers to users or land cover category that doesn’t exist in the park
#Filtering with a dynamic table to identify the parks, user, and the land cover.
landcover <- flickr%>%
group_by(PARK, USE, USER)%>%
summarise(land_picture = n()) #Total number of picture in a land cover, per park, taken by a user.
#Counting the number of users per park in each land cover
landcover <- landcover %>%
group_by(PARK, USE) %>%
summarise(park_landcover = n()) #Total number of user who took pictures in a land cover per park.
#Pivot table that create a matrix of park (rows) and land cover (columns)
lu<- pivot_wider(landcover, names_from = USE, values_from = park_landcover)
landcover_park <- as.data.frame(lu) # We transform the matrix into a dataframe
#Rename of the codes (numbers) with the names of the days
#Land cover categories that are reduced
names(landcover_park)[names(landcover_park)=="100"] <- "Building area"
names(landcover_park)[names(landcover_park)=="120"] <- "Traffic and transportation surface"
names(landcover_park)[names(landcover_park)=="140"] <- "Special settlement areas"
names(landcover_park)[names(landcover_park)=="160"] <- "Recreation and green spaces"
#Land cover categories that are considered for the analysis
names(landcover_park)[names(landcover_park)=="200"] <- "Fruit growing, viticulture, horticulture"
names(landcover_park)[names(landcover_park)=="220"] <- "Arable and Forage Cultivation"
names(landcover_park)[names(landcover_park)=="240"] <- "Alpine farming"
names(landcover_park)[names(landcover_park)=="300"] <- "Forest"
names(landcover_park)[names(landcover_park)=="400"] <- "Lakes and rivers"
names(landcover_park)[names(landcover_park)=="420"] <- "Unproductives"
#Organizing the table by alphabetical order of Parks names
landcover_park <- landcover_park %>% arrange(PARK)
#Exporting the table to csv in the root folder
#write.csv(landcover_park,"~/GitHub/Flickr_SwissParks/Results/LandCover.csv", row.names = FALSE)
#Displaying the table with a better format
kbl(landcover_park)%>%
kable_styling(bootstrap_options = "striped", full_width = T, position = "left", latex_options = c("striped", "repeat_header"))
| PARK | Building area | Traffic and transportation surface | Special settlement areas | Recreation and green spaces | Arable and Forage Cultivation | Alpine farming | Forest | Lakes and rivers | Unproductives | Fruit growing, viticulture, horticulture |
|---|---|---|---|---|---|---|---|---|---|---|
| Biosfera Val Müstair | 53 | 28 | 1 | 6 | 69 | 79 | 77 | 23 | 64 | NA |
| Jurapark Aargau | 124 | 64 | 10 | 17 | 166 | NA | 102 | 43 | 1 | 25 |
| Landschaftspark Binntal | 15 | 10 | 1 | 2 | 52 | 14 | 42 | 17 | 39 | NA |
| Naturpark Beverin | 42 | 52 | 6 | 15 | 127 | 85 | 163 | 78 | 102 | 2 |
| Naturpark Diemtigtal | 7 | 8 | NA | NA | 21 | 41 | 29 | 10 | 19 | NA |
| Naturpark Gantrisch | 66 | 48 | 3 | 9 | 195 | 82 | 139 | 89 | 26 | 9 |
| Naturpark Pfyn-Finges | 113 | 39 | 18 | 24 | 105 | 104 | 80 | 45 | 158 | 38 |
| Naturpark Thal | 29 | 22 | 2 | 1 | 57 | 50 | 87 | 2 | 3 | 6 |
| Parc du Doubs | 93 | 73 | 4 | 8 | 127 | 75 | 162 | 78 | 28 | 4 |
| Parc Ela | 149 | 89 | 16 | 29 | 278 | 158 | 307 | 84 | 162 | 9 |
| Parc Jura vaudois | 173 | 103 | 14 | 36 | 191 | 214 | 280 | 141 | 37 | 13 |
| Parc naturel périurbain du Jorat | NA | 2 | 1 | 1 | 15 | NA | 38 | 3 | NA | NA |
| Parc naturel régional de la Vallée du Trient | 58 | 45 | 42 | 4 | 67 | 81 | 223 | 61 | 220 | 15 |
| Parc naturel régional Gruyère Pays-d’Enhaut | 297 | 190 | 18 | 299 | 636 | 429 | 396 | 103 | 176 | 9 |
| Parc régional Chasseral | 118 | 81 | 13 | 20 | 156 | 172 | 152 | 5 | 5 | 30 |
| Parco Val Calanca | 7 | 4 | 2 | NA | 10 | 11 | 28 | 6 | 11 | NA |
| Regionaler Naturpark Schaffhausen | 22 | 16 | 3 | 3 | 56 | 1 | 39 | 10 | 1 | 12 |
| Schweizerischer Nationalpark | NA | 1 | NA | NA | NA | 1 | 85 | 8 | 60 | NA |
| UNESCO Biosphäre Entlebuch | 25 | 9 | 1 | 4 | 75 | 74 | 68 | 10 | 34 | 1 |
| Wildnispark Zürich Sihlwald | 1 | 7 | NA | NA | NA | NA | 50 | 5 | NA | NA |
rm(lu)
rm(landcover)
b. Area (km2) of national parks
#Filtering in a dynamic table to extract Park and square area
#We need this step because some parks have several polygons (with different square area)
#So the extracted information of square area is provided by the surface of each polygon
#where the points (Flickr pictures) where located.
area_portions <- ddbb %>%
group_by(PARK, km2) %>%
summarise(photos_km = n())
#Summarizing the total area of the polygons that compose the parks where the points where located.
area_park <- area_portions %>%
group_by(PARK) %>%
summarise(km = round(sum(km2),2)) #Round for having just two decimals, and sum for adding all polygons surface
# Obtaining the total area (km2) covered by all parks:
area_park %>%
summarize(area = sum(km))#km2
## # A tibble: 1 x 1
## area
## <dbl>
## 1 5848.
rm(area_portions)
c. Count of pictures in the parks
#Filtering in a dynamic table to extract the number of pictures per Park
photos <- ddbb %>%
group_by(PARK) %>%
summarise(photo = n()) %>% #Total number of pictures per park
mutate(ratio_photos= round((photo / sum(photo))*100, 2)) %>% #Percentage with 2 decimal of pictures
arrange(desc(ratio_photos)) #Sort the table per percentage of pictures
#Filtering in a dynamic table to extract Park and square area#The total number of pictures is:
ddbb %>%
summarize(count = n()) #pictures
## count
## 1 56804
d. Count of Users in the parks
#Filtering in a dynamic table to extract the number of pictures per Park and users
users_flickr <- ddbb %>%
group_by(PARK, USER) %>%
summarise(user_photos = n())
#From the previous filter, we can count the number of users per park
users_flickr <- users_flickr %>%
group_by(PARK) %>%
summarise(user = n())%>%
mutate(ratio_users= round((user / sum(user))*100, 2)) %>%
arrange(desc(ratio_users))
#The total summary about the number of users
users_flickr %>%
summarize(users = sum(user))#users
## # A tibble: 1 x 1
## users
## <int>
## 1 5743
d. Statistic of area (km2), users and photos for each Swiss National Parks: NOTE: THESE VALUES ONLY CONSIDER THE NATURAL LAND COVER (the dismissed categories are: Building area”, “Traffic and transportation surface”, “Special settlement areas”, “Recreation and green spaces”
general <- data.frame("Parks"= area_park$PARK, "Area(km2)"= area_park$km, "#Flickr"= photos$photo, "%Flickr"= photos$ratio_photos, "Flickr/km2"=round((photos$photo/area_park$km),2),"#Users"=users_flickr$user, "%Users"=users_flickr$ratio_users, "Users/km2"=round((users_flickr$user/area_park$km),2), check.names=FALSE )
general <- general %>% arrange(Parks)
kbl(general) %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| Parks | Area(km2) | #Flickr | %Flickr | Flickr/km2 | #Users | %Users | Users/km2 |
|---|---|---|---|---|---|---|---|
| Biosfera Val Müstair | 198.64 | 13327 | 23.46 | 67.09 | 1135 | 19.76 | 5.71 |
| Jurapark Aargau | 299.05 | 6199 | 10.91 | 20.73 | 643 | 11.20 | 2.15 |
| Landschaftspark Binntal | 164.78 | 6052 | 10.65 | 36.73 | 529 | 9.21 | 3.21 |
| Naturpark Beverin | 515.11 | 4345 | 7.65 | 8.44 | 453 | 7.89 | 0.88 |
| Naturpark Diemtigtal | 135.50 | 4251 | 7.48 | 31.37 | 363 | 6.32 | 2.68 |
| Naturpark Gantrisch | 405.68 | 3015 | 5.31 | 7.43 | 357 | 6.22 | 0.88 |
| Naturpark Pfyn-Finges | 276.51 | 2992 | 5.27 | 10.82 | 356 | 6.20 | 1.29 |
| Naturpark Thal | 139.39 | 2883 | 5.08 | 20.68 | 338 | 5.89 | 2.42 |
| Parc du Doubs | 293.65 | 2786 | 4.90 | 9.49 | 305 | 5.31 | 1.04 |
| Parc Ela | 657.82 | 1768 | 3.11 | 2.69 | 239 | 4.16 | 0.36 |
| Parc Jura vaudois | 530.62 | 1715 | 3.02 | 3.23 | 198 | 3.45 | 0.37 |
| Parc naturel périurbain du Jorat | 9.38 | 1529 | 2.69 | 163.01 | 172 | 2.99 | 18.34 |
| Parc naturel régional de la Vallée du Trient | 206.92 | 1314 | 2.31 | 6.35 | 129 | 2.25 | 0.62 |
| Parc naturel régional Gruyère Pays-d’Enhaut | 632.10 | 1290 | 2.27 | 2.04 | 120 | 2.09 | 0.19 |
| Parc régional Chasseral | 473.30 | 1140 | 2.01 | 2.41 | 107 | 1.86 | 0.23 |
| Parco Val Calanca | 120.49 | 759 | 1.34 | 6.30 | 86 | 1.50 | 0.71 |
| Regionaler Naturpark Schaffhausen | 213.03 | 587 | 1.03 | 2.76 | 71 | 1.24 | 0.33 |
| Schweizerischer Nationalpark | 170.33 | 364 | 0.64 | 2.14 | 51 | 0.89 | 0.30 |
| UNESCO Biosphäre Entlebuch | 394.49 | 326 | 0.57 | 0.83 | 46 | 0.80 | 0.12 |
| Wildnispark Zürich Sihlwald | 10.95 | 162 | 0.29 | 14.79 | 45 | 0.78 | 4.11 |
#write.csv(general,"~/GitHub/Flickr_SwissParks/Results/General.csv", row.names = FALSE)
#Pictures taken by user
contrib <- ddbb %>%
group_by(PARK, USER)%>%
summarise(photo = n())
summary(contrib$photo)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 9.891 6.000 1629.000
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Picture_Park_Taken.png")
ggplot(contrib, aes(x=log(photo))) +
geom_histogram(aes(y=(..count..)/sum(..count..)),
binwidth = .1,
colour="black", fill="white")+
facet_wrap(vars(PARK), labeller = label_wrap_gen(width=24))+
theme_bw()+
theme(axis.text.x = element_text(colour = "grey20", size = 6, angle = 90, hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 6),
strip.text = element_text(face = "italic"),
text = element_text(size = 10))+
ggtitle('Distribution of percentage of pictures taken by users in the Parks')
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Trend_Pictures.png")
x <-contrib$photo
y <- x + rnorm(2500, 0, 50)
## Warning in x + rnorm(2500, 0, 50): longer object length is not a multiple of
## shorter object length
qqplot(x, y)
ddbb$DATE <- as.Date(ddbb$DATE)
ddbb$YEAR <- format(ddbb$DATE, format="%Y")
#Calculation of statistics per year
year_photo <- ddbb %>%
group_by(PARK, YEAR, USER)%>%
summarise(photo_yearly = n())
yearly_counts <- year_photo %>%
group_by(PARK, YEAR)%>%
summarise(VISITS = n())
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/general_trend.png")
ggplot(data = yearly_counts, aes(x = YEAR, y = VISITS, group = 1)) +
geom_line() +
facet_wrap(vars(PARK), labeller = label_wrap_gen(width=24))+
theme_bw()+
theme(axis.text.x = element_text(colour = "grey20", size = 6, angle = 90, hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 6),
strip.text = element_text(face = "italic"),
text = element_text(size = 10))+
ggtitle('Annual distribution of users per Park')
rm(year_photo)
Number of users per season in each Swiss National Parks
#create dates variable for your column that contains dates
dates <- ddbb$DATE
#get the month of the date, create new column called month
ddbb$month<-(month(dates, label=TRUE))
ddbb$SEASON <- ifelse(ddbb$month %in% c('May','Jun','Jul'), "Summer",
ifelse (ddbb$month %in% c('Aug','Sep','Oct'), "Autumn",
ifelse (ddbb$month %in% c('Nove','Dec','Jan'),
"Winter", "Spring")))
#Calculation of pictures per user in seasons
season <- ddbb %>%
group_by(PARK, SEASON, USER)%>%
summarise(picture_season = n())
#From the previous table, we count the number of users per park in each season
f <- c("Summer", "Spring", "Winter", "Autumn")
color_list <- c("#ffbf00", "#00b04f","#00b0f0", "#ed7c31")
col_order <- c("PARK", "Autumn", "Winter", "Spring", "Summer")
season_users <- season %>%
group_by(PARK, SEASON) %>%
summarise(season_users = n())%>%
mutate(SEASON = factor(SEASON, levels = f))%>%
arrange(SEASON)
season_park<- pivot_wider(season_users, names_from = SEASON, values_from = season_users)
season_park <- season_park[, col_order]
season_park <- season_park %>% arrange(PARK)
#write.csv(season_park,"//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Season_Park.csv", row.names = FALSE)
#Display of the table with a better format
kbl(season_park)%>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| PARK | Autumn | Winter | Spring | Summer |
|---|---|---|---|---|
| Biosfera Val Müstair | 97 | 20 | 21 | 73 |
| Jurapark Aargau | 91 | 48 | 97 | 100 |
| Landschaftspark Binntal | 51 | 17 | 16 | 35 |
| Naturpark Beverin | 144 | 52 | 67 | 141 |
| Naturpark Diemtigtal | 31 | 16 | 22 | 20 |
| Naturpark Gantrisch | 146 | 102 | 138 | 124 |
| Naturpark Pfyn-Finges | 140 | 55 | 91 | 115 |
| Naturpark Thal | 53 | 28 | 49 | 43 |
| Parc du Doubs | 120 | 54 | 96 | 119 |
| Parc Ela | 271 | 121 | 158 | 215 |
| Parc Jura vaudois | 216 | 156 | 199 | 193 |
| Parc naturel périurbain du Jorat | 14 | 12 | 20 | 13 |
| Parc naturel régional de la Vallée du Trient | 213 | 44 | 85 | 178 |
| Parc naturel régional Gruyère Pays-d’Enhaut | 458 | 276 | 281 | 418 |
| Parc régional Chasseral | 144 | 94 | 122 | 138 |
| Parco Val Calanca | 21 | 3 | 15 | 17 |
| Regionaler Naturpark Schaffhausen | 39 | 20 | 25 | 34 |
| Schweizerischer Nationalpark | 72 | 11 | 9 | 35 |
| UNESCO Biosphäre Entlebuch | 63 | 36 | 52 | 53 |
| Wildnispark Zürich Sihlwald | 12 | 11 | 23 | 16 |
#Stacked normalized horizontal bar graph
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/season_park.png")
ggplot(season_users, #my data
aes(x = PARK, y = season_users, fill = SEASON, group = SEASON)) +
geom_bar(position = "fill", #Creates stacked bars with 100% of proportion
stat="identity",
color='white',
width = 0.8)+
scale_y_continuous(labels = scales::percent)+ # Change the name of the labels into percentages
scale_fill_manual(values=color_list)+
guides(fill = guide_legend(reverse = TRUE))+ #Change the order of the names in the label
theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
legend.position = 'bottom',
axis.text.x = element_text(angle = 90, vjust = 0.15, hjust=0.15, size = 9),
axis.text.y = element_text(size = 9),
strip.background = element_rect(fill = "#17252D", color = "#17252D"),
strip.text = element_text(size = rel(1), face = "bold", color = "white", margin = margin(5,0,5,0)))+
scale_x_discrete(labels = function(x) str_wrap(x, width = 50))+
ggtitle("Proportion of visitors per Season")+
labs(x ="Parks", y = "Visitors")+
ggplot2::coord_flip() #We change the axis to fit into horizontal bars
rm(season)
rm(season_users)
Number of visitors per day in each Swiss National Parks
ddbb$DAY <- weekdays(as.Date(ddbb$DATE))
ddbb$DAY <- factor(ddbb$DAY, levels = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"))
photoUser_day <- ddbb %>%
group_by(PARK, DAY, USER)%>%
summarise(photo_day = n())
#Counting the number of users per park in each season
daily <- photoUser_day %>%
group_by(PARK, DAY) %>%
summarise(visit_day = n())
sd<- pivot_wider(daily, names_from = DAY, values_from = visit_day)
daily_park <- as.data.frame(sd)
daily_park <- daily_park %>% arrange(PARK)
#Display of the table with a better format
kbl(daily_park)%>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
| PARK | Monday | Tuesday | Wednesday | Thursday | Friday | Saturday | Sunday |
|---|---|---|---|---|---|---|---|
| Biosfera Val Müstair | 35 | 37 | 35 | 31 | 33 | 61 | 43 |
| Jurapark Aargau | 55 | 40 | 50 | 42 | 52 | 66 | 86 |
| Landschaftspark Binntal | 23 | 21 | 17 | 17 | 24 | 25 | 33 |
| Naturpark Beverin | 54 | 49 | 49 | 66 | 63 | 102 | 109 |
| Naturpark Diemtigtal | 10 | 9 | 12 | 10 | 16 | 28 | 27 |
| Naturpark Gantrisch | 57 | 63 | 68 | 73 | 81 | 123 | 139 |
| Naturpark Pfyn-Finges | 65 | 60 | 47 | 55 | 77 | 90 | 109 |
| Naturpark Thal | 23 | 16 | 20 | 26 | 16 | 39 | 54 |
| Parc du Doubs | 41 | 38 | 47 | 42 | 59 | 100 | 110 |
| Parc Ela | 120 | 127 | 115 | 119 | 137 | 174 | 184 |
| Parc Jura vaudois | 88 | 88 | 89 | 84 | 126 | 199 | 222 |
| Parc naturel périurbain du Jorat | 10 | 6 | 6 | 2 | 10 | 8 | 18 |
| Parc naturel régional de la Vallée du Trient | 73 | 75 | 67 | 79 | 77 | 115 | 131 |
| Parc naturel régional Gruyère Pays-d’Enhaut | 182 | 160 | 183 | 187 | 218 | 390 | 376 |
| Parc régional Chasseral | 65 | 64 | 62 | 60 | 85 | 113 | 151 |
| Parco Val Calanca | 10 | 10 | 9 | 7 | 9 | 21 | 12 |
| Regionaler Naturpark Schaffhausen | 14 | 12 | 19 | 9 | 15 | 33 | 33 |
| Schweizerischer Nationalpark | 18 | 20 | 17 | 21 | 18 | 26 | 23 |
| UNESCO Biosphäre Entlebuch | 38 | 24 | 27 | 34 | 35 | 46 | 56 |
| Wildnispark Zürich Sihlwald | 6 | 2 | 11 | 4 | 3 | 14 | 23 |
#write.csv(daily_park,"~/GitHub/Flickr_SwissParks/Results/Day-Visits_Park.csv", row.names = FALSE)
Bar plot of visitors per day in the Parks
#Horizontal bar chart of daily number of visitors per Park
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/daily_park.png") #Export the graph as *.png
ggplot(daily, #my data
aes(x = PARK, y = visit_day, fill = DAY, group = DAY)) +
geom_bar(position = position_dodge(),
stat="identity",
color='white',
width = 0.8)+
scale_fill_manual(values=c("#2980b9", "#5dade2", "#76d7c4","#2ecc71", "#27ae60", "#f39c12","#d35400"))+
theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
legend.position = 'bottom',
axis.text.x = element_text(angle = 90, vjust = 0.15, hjust=0.15, size = 9),
axis.text.y = element_text(size = 9),
strip.background = element_rect(fill = "#17252D", color = "#17252D"),
strip.text = element_text(size = rel(0.5), face = "bold", color = "white", margin = margin(5,0,5,0)))+
scale_x_discrete(labels = function(x) str_wrap(x, width = 24))+
xlab('Swiss National Parks') + ylab('Number of visitors') +
ggtitle('Daily number of visitors')
Mosaic plot of visitors per day in the Parks (Two test)
#GRAPH OF MOSAIC PLOT OF PARKS AND DAYS
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/daily_park_Mosaic1.png")
ggplot(data = photoUser_day) +
geom_mosaic(aes(x=product(DAY, PARK), fill = DAY), offset = 0.02) +
scale_fill_manual(values=c("#27ae60","#2ecc71","#76d7c4", "#5dade2", "#2980b9", "#f39c12","#d35400"))+
labs(y = "DAYS", title=" Mosaic plot of visits per Day in Parks ") +
theme(legend.position = "none",
axis.text.y=element_text(size = 10),
axis.ticks.y=element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 10)
)
## Warning: `unite_()` was deprecated in tidyr 1.2.0.
## Please use `unite()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
#POINT GRAPH OF FREQUENCY OF VISIT IN PARKS PER DAY
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/daily_park_Mosaic2.png")
ggplot(daily, aes(x = PARK, y = visit_day, color = DAY)) +
geom_point() +
labs(x = "Days", y = "Visits") +
scale_color_manual(name = "Days:", values=c("#27ae60","#2ecc71","#76d7c4", "#5dade2", "#2980b9", "#f39c12","#d35400")) +
theme(legend.title = element_text(size = 14, face = 2),
legend.position = 'bottom',
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 8))+
ggtitle('Daily total number of visitors in Parks')
Analysis of users according the canton origin, visited park, and time span between the first and the last picture
canton<- ddbb %>%
group_by(CANTON, USER)%>%
summarise(canton_user = n())
co<- pivot_wider(canton, names_from = CANTON, values_from = canton_user)
canton_users <- as.data.frame(co)
canton_users[is.na(canton_users)] = 0
canton_users <- canton_users %>% mutate_if(is.numeric, ~1 * (. > 0))
canton_users$Total_Canton = rowSums(canton_users[,c(2:13)])
canton_users <- canton_users[order(canton_users$Total_Canton, decreasing = FALSE),]
#write.csv(canton_users,"~/GitHub/Flickr_SwissParks/Results/Canton_Users.csv", row.names = FALSE)
ggplot(canton_users, aes(x=Total_Canton))+
geom_histogram(binwidth=1 ,colour="black", fill="white")+
geom_vline(aes(xintercept=mean(Total_Canton)), color="blue", linetype="dashed", size=1)+
labs(title="Histogram of Cantons by users ",x="Number of visited Cantons", y = "Count of users")+
scale_x_continuous(breaks = seq(0, 12, 1))+
scale_y_continuous(breaks = seq(0, 6000, 200))
#Heatmap
coul <- colorRampPalette(c("beige", "green"))(5)
rownames(canton_users) <- canton_users[,1]
canton_users[,1]<- NULL
canton_users[,13]<- NULL
data <-as.matrix(canton_users)
heatmap(data, Colv = NA, Rowv = NA, scale = "column", col = coul, main = "Heatmap of User per Canton")
rm(canton)
rm(co)
rm(coul)
park <- ddbb %>%
group_by(PARK, USER)%>%
summarise(park_user = n())
po<- pivot_wider(park, names_from = PARK, values_from = park_user)
park_users <- as.data.frame(po)
park_users[is.na(park_users)] = 0
park_users <- park_users %>% mutate_if(is.numeric, ~1 * (. > 0))
park_users$Total_Park = rowSums(park_users[,c(2:21)])
park_users <- park_users[order(park_users$Total_Park, decreasing = FALSE),]
#write.csv(park_users,"~/GitHub/Flickr_SwissParks/Results/Park_Users.csv", row.names = FALSE)
ggplot(park_users, aes(x=Total_Park))+
geom_histogram(binwidth=1 ,colour="black", fill="white")+
geom_vline(aes(xintercept=mean(Total_Park)), color="blue", linetype="dashed", size=1)+
labs(title="Histogram of Parks by users ",x="Number of visited Parks", y = "Count of users")+
scale_x_continuous(breaks = seq(0, 20, 1))+
scale_y_continuous(breaks = seq(0, 6000, 200))
#Heatmap
coul <- colorRampPalette(c("beige", "blue"))(10)
rownames(park_users) <- park_users[,1]
park_users[,1]<- NULL
park_users[,21]<- NULL
park_users <-as.matrix(park_users)
heatmap(park_users, Colv = NA, Rowv = NA, scale = "column", col = coul, main = "Heatmap of User per Park")
rm(park)
rm(po)
rm(coul)
user <- ddbb %>%
group_by(USER)%>%
summarise(photos = n())
user <- as.data.frame(user)
dt <- ddbb %>%
group_by(USER, DATE)%>%
summarise(photos = n())%>%
mutate(DATE=as.Date(DATE))
dt1 <- dt %>%
group_by(USER) %>%
arrange(DATE) %>%
slice(1L)
first <- as.data.frame(dt1)
names(first)[names(first)=="DATE"] <- "start"
dt2 <- dt %>%
group_by(USER) %>%
arrange(desc(DATE)) %>%
slice(1L)
last <- as.data.frame(dt)
names(last)[names(last)=="DATE"] <- "end"
dt_final <- merge(last, first, by="USER")
dur <- dt_final %>%
mutate(
days = end - start,
seconds = as.numeric(difftime(end, start)) / 365.25,
years = round(interval(start, end) / years(1))
)
time_user <- merge(user, dur, by="USER", all=T)
time_user <- subset(time_user, select=-c(photos.x,photos.y))
time_user <- time_user[order(time_user$seconds, decreasing = TRUE),]
ggplot(time_user, aes(x=years))+
geom_histogram(binwidth=1 ,colour="black", fill="white")+
geom_vline(aes(xintercept=mean(years)), color="blue", linetype="dashed", size=1)+
labs(title="Histogram of time between first and last picture by users ",x="time span in years", y = "Count of users")+
scale_x_continuous(breaks = seq(0, 20, 1))+
scale_y_continuous(breaks = seq(0, 6000, 500))
#write.csv(time_user,"~/GitHub/Flickr_SwissParks/Results/Time_Users.csv", row.names = FALSE)
rm(user)
rm(dt)
rm(dt1)
rm(dt2)
rm(last)
rm(dur)
set.seed(42)
base <- data.frame(ddbb$photo_id, ddbb$USER, ddbb$PARK, ddbb$TAGS)
names(base)[names(base) == "ddbb.PARK"] <- "doc_id"
names(base)[names(base) == "ddbb.TAGS"] <- "text"
names(base)[names(base) == "ddbb.USER"] <- "author"
names(base)[names(base) == "ddbb.photo_id"] <- "origin"
base <- base %>%
mutate(text = strsplit(as.character(text), ",")) %>%
unnest(text)
base <- base[!duplicated(base),]
base$text <- gsub("[[:digit:]]", " ", base$text) # Remove numbers
base$text <- gsub("[][!#$%()*,.:;<=>@^_`|~.{}„¨“”´´°ã]", " ", base$text) # Remove extra symbols
base$text <- gsub("[[:punct:]]", " ", base$text, perl=TRUE) #remove punctuation
base$text <- gsub('\\b\\w{1,2}\\s','',base$text) #Remove words with less than 2 characters
base$text <- gsub('\\b\\w{6,}\\s','',base$text) #Remove words with more than 7 characters
base$text <- gsub("\\s+", " ", str_trim(base$text)) # Remove extra whitespaces
base_source=DataframeSource(base)
base_corpus=VCorpus(base_source)
#base_corpus_clean <- tm_map(base_corpus, removeWords, stopwords("english"))
#base_corpus_clean <- tm_map(base_corpus,stemDocument) #find to root of the words
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Parks.png")
wordcloud(base_corpus,
min.freq = 5,
max.words = 50,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#myStopwords <- setdiff(myStopwords, c("d", "e"))
#text_corpus_clean <- tm_map(base_corpus_clean, removeWords, myStopwords)
#png(//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Nationalpark.png")
sub_jurapark <- base%>%filter(doc_id=="Schweizerischer Nationalpark")
sub_jurapark=DataframeSource(sub_jurapark)
sub_jurapark=VCorpus(sub_jurapark)
sub_jurapark <- tm_map(sub_jurapark, removeWords, stopwords("german"))
#sub_jurapark <- tm_map(sub_jurapark,stemDocument) #find to root of the words
jurapark <- wordcloud(sub_jurapark,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Jurapark.png")
sub_jurapark <- base%>%filter(doc_id=="Jurapark Aargau")
sub_jurapark=DataframeSource(sub_jurapark)
sub_jurapark=VCorpus(sub_jurapark)
sub_jurapark <- tm_map(sub_jurapark, removeWords, stopwords("german"))
#sub_jurapark <- tm_map(sub_jurapark,stemDocument) #find to root of the words
jurapark <- wordcloud(sub_jurapark,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Gruyere.png")
sub_gruyere <- base%>%filter(doc_id=="Parc naturel régional Gruyère Pays-d’Enhaut")
sub_gruyere=DataframeSource(sub_gruyere)
sub_gruyere=VCorpus(sub_gruyere)
sub_gruyere <- tm_map(sub_gruyere, removeWords, stopwords("french"))
#sub_gruyere <- tm_map(sub_gruyere,stemDocument) #find to root of the words
gruyere <- wordcloud(sub_gruyere,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
## Warning in wordcloud(sub_gruyere, min.freq = 5, max.words = 100, random.order =
## FALSE, : wwwbernergartenphilosophcomcheuferuegenchschweizorganicspiriteu could
## not be fit on page. It will not be plotted.
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Mustair.png")
sub_mustair <- base%>%filter(doc_id=="Biosfera Val Müstair")
sub_mustair=DataframeSource(sub_mustair)
sub_mustair=VCorpus(sub_mustair)
sub_mustair <- tm_map(sub_mustair, removeWords, stopwords("german"))
#sub_mustair <- tm_map(sub_mustair,stemDocument) #find to root of the words
mustair <- wordcloud(sub_mustair,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Sihlwald.png")
sub_sihlwald <- base%>%filter(doc_id=="Wildnispark Zürich Sihlwald")
sub_sihlwald=DataframeSource(sub_sihlwald)
sub_sihlwald=VCorpus(sub_sihlwald)
sub_sihlwald <- tm_map(sub_sihlwald, removeWords, stopwords("german"))
#sub_sihlwald <- tm_map(sub_sihlwald,stemDocument) #find to root of the words
sihlwald <- wordcloud(sub_sihlwald,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Chasseral.png")
sub_chasseral <- base%>%filter(doc_id=="Parc régional Chasseral")
sub_chasseral=DataframeSource(sub_chasseral)
sub_chasseral=VCorpus(sub_chasseral)
sub_chasseral <- tm_map(sub_chasseral, removeWords, stopwords("french"))
#sub_chasseral <- tm_map(sub_chasseral,stemDocument) #find to root of the words
chasseral <- wordcloud(sub_chasseral,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Vaudois.png")
sub_vaudois <- base%>%filter(doc_id=="Parc Jura vaudois")
sub_vaudois=DataframeSource(sub_vaudois)
sub_vaudois=VCorpus(sub_vaudois)
sub_vaudois <- tm_map(sub_vaudois, removeWords, stopwords("french"))
#sub_vaudois <- tm_map(sub_vaudois,stemDocument) #find to root of the words
vaudois <- wordcloud(sub_vaudois,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Ela.png")
sub_ela <- base%>%filter(doc_id=="Parc Ela")
sub_ela=DataframeSource(sub_ela)
sub_ela=VCorpus(sub_ela)
sub_ela <- tm_map(sub_ela, removeWords, stopwords("german"))
#sub_ela <- tm_map(sub_ela,stemDocument) #find to root of the words
ela <- wordcloud(sub_ela,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Gantrisch.png")
sub_gantrisch <- base%>%filter(doc_id=="Naturpark Gantrisch")
sub_gantrisch=DataframeSource(sub_gantrisch)
sub_gantrisch=VCorpus(sub_gantrisch)
sub_gantrisch <- tm_map(sub_gantrisch, removeWords, stopwords("german"))
#sub_gantrisch <- tm_map(sub_gantrisch,stemDocument) #find to root of the words
gantrisch <- wordcloud(sub_gantrisch,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Beverin.png")
sub_beverin <- base%>%filter(doc_id=="Naturpark Beverin")
sub_beverin=DataframeSource(sub_beverin)
sub_beverin=VCorpus(sub_beverin)
sub_beverin <- tm_map(sub_beverin, removeWords, stopwords("german"))
#sub_beverin <- tm_map(sub_beverin,stemDocument) #find to root of the words
beverin <- wordcloud(sub_beverin,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Schaffhausen.png")
sub_schaffhausen <- base%>%filter(doc_id=="Regionaler Naturpark Schaffhausen")
sub_schaffhausen=DataframeSource(sub_schaffhausen)
sub_schaffhausen=VCorpus(sub_schaffhausen)
sub_schaffhausen <- tm_map(sub_schaffhausen, removeWords, stopwords("german"))
#sub_schaffhausen <- tm_map(sub_schaffhausen,stemDocument) #find to root of the words
schaffhausen <- wordcloud(sub_schaffhausen,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Trient.png")
sub_trient <- base%>%filter(doc_id=="Parc naturel régional de la Vallée du Trient")
sub_trient=DataframeSource(sub_trient)
sub_trient=VCorpus(sub_trient)
sub_trient <- tm_map(sub_trient, removeWords, stopwords("french"))
#sub_trient <- tm_map(sub_trient,stemDocument) #find to root of the words
trient <- wordcloud(sub_trient,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Doubs.png")
sub_doubs <- base%>%filter(doc_id=="Parc du Doubs")
sub_doubs=DataframeSource(sub_doubs)
sub_doubs=VCorpus(sub_doubs)
sub_doubs <- tm_map(sub_doubs, removeWords, stopwords("french"))
#sub_doubs <- tm_map(sub_doubs,stemDocument) #find to root of the words
doubs <- wordcloud(sub_doubs,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Thal.png")
sub_thal <- base%>%filter(doc_id=="Naturpark Thal")
sub_thal=DataframeSource(sub_thal)
sub_thal=VCorpus(sub_thal)
sub_thal <- tm_map(sub_thal, removeWords, stopwords("german"))
#sub_thal <- tm_map(sub_thal,stemDocument) #find to root of the words
thal <- wordcloud(sub_thal,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Pfyn-Finges.png")
sub_pfyn_finges <- base%>%filter(doc_id=="Naturpark Pfyn-Finges")
sub_pfyn_finges=DataframeSource(sub_pfyn_finges)
sub_pfyn_finges=VCorpus(sub_pfyn_finges)
sub_pfyn_finges <- tm_map(sub_pfyn_finges, removeWords, stopwords("german"))
#sub_pfyn_finges <- tm_map(sub_pfyn_finges,stemDocument) #find to root of the words
pfyn_finges <- wordcloud(sub_pfyn_finges,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Diemigtal.png")
sub_diemtigtal <- base%>%filter(doc_id=="Naturpark Diemtigtal")
sub_diemtigtal=DataframeSource(sub_diemtigtal)
sub_diemtigtal=VCorpus(sub_diemtigtal)
sub_diemtigtal <- tm_map(sub_diemtigtal, removeWords, stopwords("german"))
#sub_diemtigtal <- tm_map(sub_diemtigtal,stemDocument) #find to root of the words
diemtigtal <- wordcloud(sub_diemtigtal,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Entlebuch.png")
sub_entlebuch <- base%>%filter(doc_id=="UNESCO Biosphäre Entlebuch")
sub_entlebuch=DataframeSource(sub_entlebuch)
sub_entlebuch=VCorpus(sub_entlebuch)
sub_entlebuch <- tm_map(sub_entlebuch, removeWords, stopwords("german"))
#sub_entlebuch <- tm_map(sub_entlebuch,stemDocument) #find to root of the words
entlebuch <- wordcloud(sub_entlebuch,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Calanca.png")
sub_calanca<- base%>%filter(doc_id=="Parco Val Calanca")
sub_calanca=DataframeSource(sub_calanca)
sub_calanca=VCorpus(sub_calanca)
sub_calanca <- tm_map(sub_calanca, removeWords, stopwords("italian"))
#sub_calanca <- tm_map(sub_calanca,stemDocument) #find to root of the words
calanca <- wordcloud(sub_calanca,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Binntal.png")
sub_binntal<- base%>%filter(doc_id=="Landschaftspark Binntal")
sub_binntal=DataframeSource(sub_binntal)
sub_binntal=VCorpus(sub_binntal)
sub_binntal <- tm_map(sub_binntal, removeWords, stopwords("french"))
#sub_binntal <- tm_map(sub_binntal,stemDocument) #find to root of the words
binntal <- wordcloud(sub_binntal,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Jorat.png")
sub_jorat<- base%>%filter(doc_id=="Parc naturel périurbain du Jorat")
sub_jorat=DataframeSource(sub_jorat)
sub_jorat=VCorpus(sub_jorat)
sub_jorat <- tm_map(sub_jorat, removeWords, stopwords("french"))
#sub_jorat <- tm_map(sub_jorat,stemDocument) #find to root of the words
jorat <- wordcloud(sub_jorat,
min.freq = 5,
max.words = 100,
random.order = FALSE,
random.color = FALSE,
colors = brewer.pal(8, "Dark2"))
###6.1 Matrix with the terms frequencies by park
#Creation of the matrix of terms (words) by Park (Document)
dtm <- DocumentTermMatrix(base_corpus, control= list(wordLengths=c(3, 8))) #erase words with less than 3 character and over 8 characters
head(sort(slam::col_sums(dtm), decreasing = TRUE), n=20) #visualize the most 20 frequent words
## swiss alps jura snow vaud suiza schweiz alpen nikon geo
## 3107 2945 2907 2774 2452 2278 2011 1903 1754 1657
## ride sony sky train mmf neige mgn car river bern
## 1639 1512 1463 1436 1328 1267 1217 1066 1040 1016
#Distribution of terms in the total contentof the corpora
Heaps_plot(dtm, main="Heap' Law distribution of words") #Frequencies distribution of the number of distinct words"
## (Intercept) x
## 0.1670713 0.7092524
Zipf_plot(dtm, main="Zipf' Law distribution of words") # inverse relation of the rank (frequencies) distribution of words
## (Intercept) x
## 13.13721 -1.56131
###6.2 Constrution of the contingency table
#transform of the matrix to split the words by Park in a matrix
dtm_df <- reshape2::melt(as.matrix(dtm))
dtm_df <- dtm_df[dtm_df$value > 0, ] #Filtering to avoid noise, for keeping row (words) than have frequency >= 1
head(dtm_df,10)
## Docs Terms value
## 9353 Parc Jura vaudois aabd 1
## 86203 Parc Ela aad 1
## 107055 Parc naturel régional Gruyère Pays-d’Enhaut aae 1
## 133172 Jurapark Aargau aar 1
## 133173 Jurapark Aargau aar 1
## 150896 Naturpark Gantrisch aar 1
## 157044 Naturpark Gantrisch aar 1
## 175635 Jurapark Aargau aar 1
## 177116 Jurapark Aargau aarau 1
## 178076 Jurapark Aargau aarau 1
#Redraw data and assemble it into a table to present in row the Parks and the words as columns, with frequencies in the values
contingency<- xtabs(~ Docs + Terms, data = dtm_df) #The occurrences is not considering the frequencies (Value) of the terms
contingency_matrix <- as.matrix(contingency)
###6.3 Constrution of the covariance matrix for the correlation between Parks
#Transformation of the matrix of frequencies into a square matrix per Park
#If both parks share a word once or several times, the correlation will be one,
#If both parks don't share the word the correlation will be zero
parks_congruency <- contingency_matrix %*% t(contingency_matrix) # square matrix
diag(parks_congruency) <- 0 #the relation between themselves is zero
parks_congruency_matrix <- as.data.frame.matrix(parks_congruency) #we transform into a matrix to melted into a edgelist
parks_congruency_matrix
## Jurapark Aargau
## Jurapark Aargau 0
## Parc naturel régional Gruyère Pays-d’Enhaut 326989
## Parc Jura vaudois 288978
## Schweizerischer Nationalpark 18934
## Parc Ela 253551
## Naturpark Gantrisch 193419
## Parc naturel régional de la Vallée du Trient 59763
## Naturpark Pfyn-Finges 95073
## Landschaftspark Binntal 28349
## Naturpark Thal 76947
## Parc régional Chasseral 262261
## UNESCO Biosphäre Entlebuch 83924
## Biosfera Val Müstair 30395
## Parc du Doubs 150748
## Parco Val Calanca 4620
## Naturpark Beverin 218533
## Regionaler Naturpark Schaffhausen 41397
## Naturpark Diemtigtal 25110
## Parc naturel périurbain du Jorat 1760
## Wildnispark Zürich Sihlwald 23972
## Parc naturel régional Gruyère Pays-d’Enhaut
## Jurapark Aargau 326989
## Parc naturel régional Gruyère Pays-d’Enhaut 0
## Parc Jura vaudois 2420075
## Schweizerischer Nationalpark 162008
## Parc Ela 1271810
## Naturpark Gantrisch 1473862
## Parc naturel régional de la Vallée du Trient 682515
## Naturpark Pfyn-Finges 643884
## Landschaftspark Binntal 308856
## Naturpark Thal 495715
## Parc régional Chasseral 1495359
## UNESCO Biosphäre Entlebuch 515567
## Biosfera Val Müstair 255370
## Parc du Doubs 458241
## Parco Val Calanca 41065
## Naturpark Beverin 762728
## Regionaler Naturpark Schaffhausen 76972
## Naturpark Diemtigtal 273170
## Parc naturel périurbain du Jorat 48872
## Wildnispark Zürich Sihlwald 65943
## Parc Jura vaudois
## Jurapark Aargau 288978
## Parc naturel régional Gruyère Pays-d’Enhaut 2420075
## Parc Jura vaudois 0
## Schweizerischer Nationalpark 39417
## Parc Ela 444544
## Naturpark Gantrisch 839060
## Parc naturel régional de la Vallée du Trient 227796
## Naturpark Pfyn-Finges 234419
## Landschaftspark Binntal 90259
## Naturpark Thal 392078
## Parc régional Chasseral 1642520
## UNESCO Biosphäre Entlebuch 197003
## Biosfera Val Müstair 63518
## Parc du Doubs 1416625
## Parco Val Calanca 18907
## Naturpark Beverin 285674
## Regionaler Naturpark Schaffhausen 21024
## Naturpark Diemtigtal 95135
## Parc naturel périurbain du Jorat 36932
## Wildnispark Zürich Sihlwald 16369
## Schweizerischer Nationalpark
## Jurapark Aargau 18934
## Parc naturel régional Gruyère Pays-d’Enhaut 162008
## Parc Jura vaudois 39417
## Schweizerischer Nationalpark 0
## Parc Ela 89176
## Naturpark Gantrisch 112550
## Parc naturel régional de la Vallée du Trient 60839
## Naturpark Pfyn-Finges 64007
## Landschaftspark Binntal 27270
## Naturpark Thal 18062
## Parc régional Chasseral 70578
## UNESCO Biosphäre Entlebuch 47229
## Biosfera Val Müstair 35099
## Parc du Doubs 23567
## Parco Val Calanca 3453
## Naturpark Beverin 68016
## Regionaler Naturpark Schaffhausen 2910
## Naturpark Diemtigtal 19143
## Parc naturel périurbain du Jorat 1052
## Wildnispark Zürich Sihlwald 3457
## Parc Ela Naturpark Gantrisch
## Jurapark Aargau 253551 193419
## Parc naturel régional Gruyère Pays-d’Enhaut 1271810 1473862
## Parc Jura vaudois 444544 839060
## Schweizerischer Nationalpark 89176 112550
## Parc Ela 0 777486
## Naturpark Gantrisch 777486 0
## Parc naturel régional de la Vallée du Trient 287668 357617
## Naturpark Pfyn-Finges 390732 442900
## Landschaftspark Binntal 159174 182659
## Naturpark Thal 131943 164252
## Parc régional Chasseral 547948 858200
## UNESCO Biosphäre Entlebuch 625873 436586
## Biosfera Val Müstair 159876 265672
## Parc du Doubs 240144 202148
## Parco Val Calanca 21987 24524
## Naturpark Beverin 1088697 556623
## Regionaler Naturpark Schaffhausen 52305 46886
## Naturpark Diemtigtal 133486 284986
## Parc naturel périurbain du Jorat 7216 7090
## Wildnispark Zürich Sihlwald 85746 45875
## Parc naturel régional de la Vallée du Trient
## Jurapark Aargau 59763
## Parc naturel régional Gruyère Pays-d’Enhaut 682515
## Parc Jura vaudois 227796
## Schweizerischer Nationalpark 60839
## Parc Ela 287668
## Naturpark Gantrisch 357617
## Parc naturel régional de la Vallée du Trient 0
## Naturpark Pfyn-Finges 260496
## Landschaftspark Binntal 102975
## Naturpark Thal 47133
## Parc régional Chasseral 329945
## UNESCO Biosphäre Entlebuch 152317
## Biosfera Val Müstair 96758
## Parc du Doubs 90928
## Parco Val Calanca 12772
## Naturpark Beverin 212721
## Regionaler Naturpark Schaffhausen 10918
## Naturpark Diemtigtal 67323
## Parc naturel périurbain du Jorat 3337
## Wildnispark Zürich Sihlwald 8968
## Naturpark Pfyn-Finges
## Jurapark Aargau 95073
## Parc naturel régional Gruyère Pays-d’Enhaut 643884
## Parc Jura vaudois 234419
## Schweizerischer Nationalpark 64007
## Parc Ela 390732
## Naturpark Gantrisch 442900
## Parc naturel régional de la Vallée du Trient 260496
## Naturpark Pfyn-Finges 0
## Landschaftspark Binntal 112695
## Naturpark Thal 59265
## Parc régional Chasseral 344566
## UNESCO Biosphäre Entlebuch 196762
## Biosfera Val Müstair 132598
## Parc du Doubs 98447
## Parco Val Calanca 13041
## Naturpark Beverin 299442
## Regionaler Naturpark Schaffhausen 21764
## Naturpark Diemtigtal 78609
## Parc naturel périurbain du Jorat 6650
## Wildnispark Zürich Sihlwald 12010
## Landschaftspark Binntal
## Jurapark Aargau 28349
## Parc naturel régional Gruyère Pays-d’Enhaut 308856
## Parc Jura vaudois 90259
## Schweizerischer Nationalpark 27270
## Parc Ela 159174
## Naturpark Gantrisch 182659
## Parc naturel régional de la Vallée du Trient 102975
## Naturpark Pfyn-Finges 112695
## Landschaftspark Binntal 0
## Naturpark Thal 14458
## Parc régional Chasseral 153556
## UNESCO Biosphäre Entlebuch 77915
## Biosfera Val Müstair 59712
## Parc du Doubs 34545
## Parco Val Calanca 5346
## Naturpark Beverin 106925
## Regionaler Naturpark Schaffhausen 3460
## Naturpark Diemtigtal 37848
## Parc naturel périurbain du Jorat 1351
## Wildnispark Zürich Sihlwald 11642
## Naturpark Thal
## Jurapark Aargau 76947
## Parc naturel régional Gruyère Pays-d’Enhaut 495715
## Parc Jura vaudois 392078
## Schweizerischer Nationalpark 18062
## Parc Ela 131943
## Naturpark Gantrisch 164252
## Parc naturel régional de la Vallée du Trient 47133
## Naturpark Pfyn-Finges 59265
## Landschaftspark Binntal 14458
## Naturpark Thal 0
## Parc régional Chasseral 339056
## UNESCO Biosphäre Entlebuch 95224
## Biosfera Val Müstair 17796
## Parc du Doubs 314262
## Parco Val Calanca 2250
## Naturpark Beverin 102866
## Regionaler Naturpark Schaffhausen 12785
## Naturpark Diemtigtal 43534
## Parc naturel périurbain du Jorat 2585
## Wildnispark Zürich Sihlwald 7640
## Parc régional Chasseral
## Jurapark Aargau 262261
## Parc naturel régional Gruyère Pays-d’Enhaut 1495359
## Parc Jura vaudois 1642520
## Schweizerischer Nationalpark 70578
## Parc Ela 547948
## Naturpark Gantrisch 858200
## Parc naturel régional de la Vallée du Trient 329945
## Naturpark Pfyn-Finges 344566
## Landschaftspark Binntal 153556
## Naturpark Thal 339056
## Parc régional Chasseral 0
## UNESCO Biosphäre Entlebuch 257211
## Biosfera Val Müstair 89747
## Parc du Doubs 804965
## Parco Val Calanca 23010
## Naturpark Beverin 373087
## Regionaler Naturpark Schaffhausen 27501
## Naturpark Diemtigtal 223147
## Parc naturel périurbain du Jorat 12679
## Wildnispark Zürich Sihlwald 20236
## UNESCO Biosphäre Entlebuch
## Jurapark Aargau 83924
## Parc naturel régional Gruyère Pays-d’Enhaut 515567
## Parc Jura vaudois 197003
## Schweizerischer Nationalpark 47229
## Parc Ela 625873
## Naturpark Gantrisch 436586
## Parc naturel régional de la Vallée du Trient 152317
## Naturpark Pfyn-Finges 196762
## Landschaftspark Binntal 77915
## Naturpark Thal 95224
## Parc régional Chasseral 257211
## UNESCO Biosphäre Entlebuch 0
## Biosfera Val Müstair 103838
## Parc du Doubs 75043
## Parco Val Calanca 9620
## Naturpark Beverin 545781
## Regionaler Naturpark Schaffhausen 15830
## Naturpark Diemtigtal 70317
## Parc naturel périurbain du Jorat 2484
## Wildnispark Zürich Sihlwald 42712
## Biosfera Val Müstair Parc du Doubs
## Jurapark Aargau 30395 150748
## Parc naturel régional Gruyère Pays-d’Enhaut 255370 458241
## Parc Jura vaudois 63518 1416625
## Schweizerischer Nationalpark 35099 23567
## Parc Ela 159876 240144
## Naturpark Gantrisch 265672 202148
## Parc naturel régional de la Vallée du Trient 96758 90928
## Naturpark Pfyn-Finges 132598 98447
## Landschaftspark Binntal 59712 34545
## Naturpark Thal 17796 314262
## Parc régional Chasseral 89747 804965
## UNESCO Biosphäre Entlebuch 103838 75043
## Biosfera Val Müstair 0 30765
## Parc du Doubs 30765 0
## Parco Val Calanca 4367 12577
## Naturpark Beverin 112859 138885
## Regionaler Naturpark Schaffhausen 7067 19628
## Naturpark Diemtigtal 62897 64488
## Parc naturel périurbain du Jorat 1090 4417
## Wildnispark Zürich Sihlwald 3375 13977
## Parco Val Calanca
## Jurapark Aargau 4620
## Parc naturel régional Gruyère Pays-d’Enhaut 41065
## Parc Jura vaudois 18907
## Schweizerischer Nationalpark 3453
## Parc Ela 21987
## Naturpark Gantrisch 24524
## Parc naturel régional de la Vallée du Trient 12772
## Naturpark Pfyn-Finges 13041
## Landschaftspark Binntal 5346
## Naturpark Thal 2250
## Parc régional Chasseral 23010
## UNESCO Biosphäre Entlebuch 9620
## Biosfera Val Müstair 4367
## Parc du Doubs 12577
## Parco Val Calanca 0
## Naturpark Beverin 18073
## Regionaler Naturpark Schaffhausen 586
## Naturpark Diemtigtal 4167
## Parc naturel périurbain du Jorat 309
## Wildnispark Zürich Sihlwald 1226
## Naturpark Beverin
## Jurapark Aargau 218533
## Parc naturel régional Gruyère Pays-d’Enhaut 762728
## Parc Jura vaudois 285674
## Schweizerischer Nationalpark 68016
## Parc Ela 1088697
## Naturpark Gantrisch 556623
## Parc naturel régional de la Vallée du Trient 212721
## Naturpark Pfyn-Finges 299442
## Landschaftspark Binntal 106925
## Naturpark Thal 102866
## Parc régional Chasseral 373087
## UNESCO Biosphäre Entlebuch 545781
## Biosfera Val Müstair 112859
## Parc du Doubs 138885
## Parco Val Calanca 18073
## Naturpark Beverin 0
## Regionaler Naturpark Schaffhausen 49211
## Naturpark Diemtigtal 79502
## Parc naturel périurbain du Jorat 6150
## Wildnispark Zürich Sihlwald 80323
## Regionaler Naturpark Schaffhausen
## Jurapark Aargau 41397
## Parc naturel régional Gruyère Pays-d’Enhaut 76972
## Parc Jura vaudois 21024
## Schweizerischer Nationalpark 2910
## Parc Ela 52305
## Naturpark Gantrisch 46886
## Parc naturel régional de la Vallée du Trient 10918
## Naturpark Pfyn-Finges 21764
## Landschaftspark Binntal 3460
## Naturpark Thal 12785
## Parc régional Chasseral 27501
## UNESCO Biosphäre Entlebuch 15830
## Biosfera Val Müstair 7067
## Parc du Doubs 19628
## Parco Val Calanca 586
## Naturpark Beverin 49211
## Regionaler Naturpark Schaffhausen 0
## Naturpark Diemtigtal 3945
## Parc naturel périurbain du Jorat 348
## Wildnispark Zürich Sihlwald 4144
## Naturpark Diemtigtal
## Jurapark Aargau 25110
## Parc naturel régional Gruyère Pays-d’Enhaut 273170
## Parc Jura vaudois 95135
## Schweizerischer Nationalpark 19143
## Parc Ela 133486
## Naturpark Gantrisch 284986
## Parc naturel régional de la Vallée du Trient 67323
## Naturpark Pfyn-Finges 78609
## Landschaftspark Binntal 37848
## Naturpark Thal 43534
## Parc régional Chasseral 223147
## UNESCO Biosphäre Entlebuch 70317
## Biosfera Val Müstair 62897
## Parc du Doubs 64488
## Parco Val Calanca 4167
## Naturpark Beverin 79502
## Regionaler Naturpark Schaffhausen 3945
## Naturpark Diemtigtal 0
## Parc naturel périurbain du Jorat 1055
## Wildnispark Zürich Sihlwald 5295
## Parc naturel périurbain du Jorat
## Jurapark Aargau 1760
## Parc naturel régional Gruyère Pays-d’Enhaut 48872
## Parc Jura vaudois 36932
## Schweizerischer Nationalpark 1052
## Parc Ela 7216
## Naturpark Gantrisch 7090
## Parc naturel régional de la Vallée du Trient 3337
## Naturpark Pfyn-Finges 6650
## Landschaftspark Binntal 1351
## Naturpark Thal 2585
## Parc régional Chasseral 12679
## UNESCO Biosphäre Entlebuch 2484
## Biosfera Val Müstair 1090
## Parc du Doubs 4417
## Parco Val Calanca 309
## Naturpark Beverin 6150
## Regionaler Naturpark Schaffhausen 348
## Naturpark Diemtigtal 1055
## Parc naturel périurbain du Jorat 0
## Wildnispark Zürich Sihlwald 292
## Wildnispark Zürich Sihlwald
## Jurapark Aargau 23972
## Parc naturel régional Gruyère Pays-d’Enhaut 65943
## Parc Jura vaudois 16369
## Schweizerischer Nationalpark 3457
## Parc Ela 85746
## Naturpark Gantrisch 45875
## Parc naturel régional de la Vallée du Trient 8968
## Naturpark Pfyn-Finges 12010
## Landschaftspark Binntal 11642
## Naturpark Thal 7640
## Parc régional Chasseral 20236
## UNESCO Biosphäre Entlebuch 42712
## Biosfera Val Müstair 3375
## Parc du Doubs 13977
## Parco Val Calanca 1226
## Naturpark Beverin 80323
## Regionaler Naturpark Schaffhausen 4144
## Naturpark Diemtigtal 5295
## Parc naturel périurbain du Jorat 292
## Wildnispark Zürich Sihlwald 0
melted_congruency <- melt(parks_congruency, value.name = "Coocurrences", varnames=c('Park1', 'Park2')) #Edgelist of parks
head(melted_congruency,10)
## Park1 Park2 Coocurrences
## 1 Jurapark Aargau Jurapark Aargau 0
## 2 Parc naturel régional Gruyère Pays-d’Enhaut Jurapark Aargau 326989
## 3 Parc Jura vaudois Jurapark Aargau 288978
## 4 Schweizerischer Nationalpark Jurapark Aargau 18934
## 5 Parc Ela Jurapark Aargau 253551
## 6 Naturpark Gantrisch Jurapark Aargau 193419
## 7 Parc naturel régional de la Vallée du Trient Jurapark Aargau 59763
## 8 Naturpark Pfyn-Finges Jurapark Aargau 95073
## 9 Landschaftspark Binntal Jurapark Aargau 28349
## 10 Naturpark Thal Jurapark Aargau 76947
base_size <- 9 #size of the labels for the ggplot
#Heatmap of the coocurrences between Parks
ggplot(melted_congruency, aes(x=Park1, y=Park2, fill= Coocurrences)) +
geom_tile(color = "black") +
scale_fill_gradient(low = "white", high = "red") +
coord_fixed()+
theme_grey(base_size = base_size) +
labs(x = "", y = "") +
scale_x_discrete(expand = c(0, 0)) +
scale_y_discrete(expand = c(0, 0)) +
theme(legend.position = "right",
axis.text.x = element_text(size = base_size * 0.8,
angle = 90,
hjust = 0, colour ="grey50"))
###6.4 Fitting the model
#To interpret correspondence analysis, the first step is to evaluate whether there is a significant dependency between the rows and columns.
#A rigorous method is to use the chi-square statistic for examining the association between row and column variables
#Fitting the data avoiding the null values, and with a correlation reduce the extreme values
fit <- princomp(na.omit(parks_congruency), cor = TRUE)
fit
## Call:
## princomp(x = na.omit(parks_congruency), cor = TRUE)
##
## Standard deviations:
## Comp.1 Comp.2 Comp.3 Comp.4
## 3.74908461577732 1.43925173377828 1.09942224719448 0.79698115535258
## Comp.5 Comp.6 Comp.7 Comp.8
## 0.76480103005762 0.67632060954672 0.60027421303011 0.42853294360283
## Comp.9 Comp.10 Comp.11 Comp.12
## 0.36927184288334 0.35537833705394 0.28318233527726 0.18560299783339
## Comp.13 Comp.14 Comp.15 Comp.16
## 0.17482937686407 0.14724592470794 0.08885308870610 0.05274692466407
## Comp.17 Comp.18 Comp.19 Comp.20
## 0.04361403623870 0.02402013785265 0.00344321145989 0.00000002172407
##
## 20 variables and 20 observations.
#Graph to find the breaks
plot(fit,type="lines", main = "Correlation of terms between Parks")
###6.5 Validation of the model
#Measure the relation between the parks with Chi2
chisq <- chisq.test(parks_congruency)
chisq
##
## Pearson's Chi-squared test
##
## data: parks_congruency
## X-squared = 22791483, df = 361, p-value < 2.2e-16
###6.7 Application of the correspondence analysis method
#the method CA is always applied when there is correlation
res.ca <- CA(parks_congruency, graph = FALSE)
summary(res.ca)
##
## Call:
## CA(X = parks_congruency, graph = FALSE)
##
## The chi square of independence between the two variables is equal to 22791483 (p-value = 0 ).
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
## Variance 0.103 0.072 0.036 0.027 0.025 0.014 0.009
## % of var. 33.180 23.133 11.533 8.720 8.093 4.383 2.766
## Cumulative % of var. 33.180 56.312 67.845 76.565 84.658 89.041 91.807
## Dim.8 Dim.9 Dim.10 Dim.11 Dim.12 Dim.13 Dim.14
## Variance 0.008 0.007 0.004 0.002 0.002 0.002 0.001
## % of var. 2.532 2.103 1.211 0.747 0.690 0.574 0.162
## Cumulative % of var. 94.338 96.442 97.653 98.400 99.090 99.664 99.826
## Dim.15 Dim.16 Dim.17 Dim.18 Dim.19
## Variance 0.000 0.000 0.000 0.000 0.000
## % of var. 0.104 0.029 0.024 0.016 0.000
## Cumulative % of var. 99.930 99.960 99.983 100.000 100.000
##
## Rows (the 10 first)
## Iner*1000 Dim.1 ctr cos2
## Jurapark Aargau | 3.285 | 0.022 0.015 0.005
## Parc naturel régional Gruyère Pays-d’Enhaut | 41.278 | 0.442 30.413 0.762
## Parc Jura vaudois | 66.629 | -0.634 46.557 0.722
## Schweizerischer Nationalpark | 1.917 | -0.105 0.126 0.068
## Parc Ela | 29.152 | -0.143 1.837 0.065
## Naturpark Gantrisch | 16.781 | 0.005 0.003 0.000
## Parc naturel régional de la Vallée du Trient | 6.781 | -0.136 0.747 0.114
## Naturpark Pfyn-Finges | 7.813 | -0.070 0.226 0.030
## Landschaftspark Binntal | 3.381 | -0.106 0.226 0.069
## Naturpark Thal | 8.415 | -0.179 0.986 0.121
## Dim.2 ctr cos2 Dim.3
## Jurapark Aargau | -0.025 0.026 0.006 | -0.007
## Parc naturel régional Gruyère Pays-d’Enhaut | -0.045 0.446 0.008 | 0.073
## Parc Jura vaudois | -0.351 20.439 0.221 | 0.104
## Schweizerischer Nationalpark | 0.289 1.375 0.517 | 0.009
## Parc Ela | 0.262 8.828 0.218 | -0.433
## Naturpark Gantrisch | 0.110 1.670 0.072 | -0.031
## Parc naturel régional de la Vallée du Trient | 0.169 1.673 0.178 | -0.015
## Naturpark Pfyn-Finges | 0.226 3.394 0.313 | 0.016
## Landschaftspark Binntal | 0.254 1.853 0.395 | 0.026
## Naturpark Thal | -0.371 6.090 0.522 | -0.026
## ctr cos2
## Jurapark Aargau 0.004 0.000 |
## Parc naturel régional Gruyère Pays-d’Enhaut 2.360 0.021 |
## Parc Jura vaudois 3.614 0.019 |
## Schweizerischer Nationalpark 0.003 0.000 |
## Parc Ela 48.352 0.596 |
## Naturpark Gantrisch 0.270 0.006 |
## Parc naturel régional de la Vallée du Trient 0.027 0.001 |
## Naturpark Pfyn-Finges 0.033 0.002 |
## Landschaftspark Binntal 0.040 0.004 |
## Naturpark Thal 0.058 0.002 |
##
## Columns (the 10 first)
## Iner*1000 Dim.1 ctr cos2
## Jurapark Aargau | 3.285 | -0.022 0.015 0.005
## Parc naturel régional Gruyère Pays-d’Enhaut | 41.278 | -0.442 30.413 0.762
## Parc Jura vaudois | 66.629 | 0.634 46.557 0.722
## Schweizerischer Nationalpark | 1.917 | 0.105 0.126 0.068
## Parc Ela | 29.152 | 0.143 1.837 0.065
## Naturpark Gantrisch | 16.781 | -0.005 0.003 0.000
## Parc naturel régional de la Vallée du Trient | 6.781 | 0.136 0.747 0.114
## Naturpark Pfyn-Finges | 7.813 | 0.070 0.226 0.030
## Landschaftspark Binntal | 3.381 | 0.106 0.226 0.069
## Naturpark Thal | 8.415 | 0.179 0.986 0.121
## Dim.2 ctr cos2 Dim.3
## Jurapark Aargau | -0.025 0.026 0.006 | 0.007
## Parc naturel régional Gruyère Pays-d’Enhaut | -0.045 0.446 0.008 | -0.073
## Parc Jura vaudois | -0.351 20.439 0.221 | -0.104
## Schweizerischer Nationalpark | 0.289 1.375 0.517 | -0.009
## Parc Ela | 0.262 8.828 0.218 | 0.433
## Naturpark Gantrisch | 0.110 1.670 0.072 | 0.031
## Parc naturel régional de la Vallée du Trient | 0.169 1.673 0.178 | 0.015
## Naturpark Pfyn-Finges | 0.226 3.394 0.313 | -0.016
## Landschaftspark Binntal | 0.254 1.853 0.395 | -0.026
## Naturpark Thal | -0.371 6.090 0.522 | 0.026
## ctr cos2
## Jurapark Aargau 0.004 0.000 |
## Parc naturel régional Gruyère Pays-d’Enhaut 2.360 0.021 |
## Parc Jura vaudois 3.614 0.019 |
## Schweizerischer Nationalpark 0.003 0.000 |
## Parc Ela 48.352 0.596 |
## Naturpark Gantrisch 0.270 0.006 |
## Parc naturel régional de la Vallée du Trient 0.027 0.001 |
## Naturpark Pfyn-Finges 0.033 0.002 |
## Landschaftspark Binntal 0.040 0.004 |
## Naturpark Thal 0.058 0.002 |
###6.8 Calculation of the eigen values
#Calculation of the dimensions (Park) mean value
eig.val <- get_eigenvalue (res.ca)
eig.val
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 0.103375010817 33.1795804543 33.17958
## Dim.2 0.072072480385 23.1326182466 56.31220
## Dim.3 0.035931008923 11.5325337522 67.84473
## Dim.4 0.027168559449 8.7201094050 76.56484
## Dim.5 0.025213842341 8.0927170301 84.65756
## Dim.6 0.013656477476 4.3832275281 89.04079
## Dim.7 0.008617470371 2.7658913817 91.80668
## Dim.8 0.007887989982 2.5317549781 94.33843
## Dim.9 0.006552761289 2.1031956241 96.44163
## Dim.10 0.003774072406 1.2113385823 97.65297
## Dim.11 0.002326489777 0.7467177426 98.39968
## Dim.12 0.002149925393 0.6900470625 99.08973
## Dim.13 0.001789479121 0.5743570521 99.66409
## Dim.14 0.000505604670 0.1622805231 99.82637
## Dim.15 0.000323681527 0.1038898780 99.93026
## Dim.16 0.000091821403 0.0294712967 99.95973
## Dim.17 0.000073960919 0.0237387375 99.98347
## Dim.18 0.000050085307 0.0160755434 99.99954
## Dim.19 0.000001418173 0.0004551815 100.00000
###6.9 Visualization of the correspondence analysis
#Graph of the alignment of the Parks, where the column profiles must be presented in row space or vice-versa.
#It is used the row eigen values to draw, and in the color (clusters) the grade of contribution
#The contribution is how much the Park is related with the represented factor (display of Dim1 and Dim2).
fviz_ca_row(res.ca, col.row= "contrib", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE,
title = "Correspondence Analysis of Parks (Contribution of Row)",
shape.row = 2,
labelsize = 3,
pointsize = 2)
###6.10 Visualization of the correlation between dimensions
#Factorial contribution by dimensions per Park
corrplot(res.ca$row$cos2, is.corr=TRUE,
tl.col = "black",
cl.align.text = "l",
tl.cex=0.9,
main="Correspondence of Parks by dimensions",
col=COL1('YlOrBr', 200))
# Visualize row contributions on axes 1 in Dim1
fviz_contrib(res.ca, choice ="row", axes = 1)
# Visualize row contributions on axes 1 in Dim2
fviz_contrib(res.ca, choice ="row", axes = 2)
#Export the summary of the analysis
write.infile(res.ca, "ca.csv", sep = ";")